setting up markdown

loading required packages

setting root path

# cas_dir = "/Volumes/psych-cog/dsnlab/TAG/"
# proj_path = "/Volumes/psych-cog/dsnlab/TAG/projects/timing_tempo_tag/"
# q_path = "/Volumes/psych-cog/dsnlab/TAG/behavior/Questionnaires/"

proj_root = "/Users/clarefmccann/University\ of\ Oregon\ Dropbox/Clare\ McCann/mine/projects/tag-projs/timing_tempo_tag/"
data_root = "/Users/clarefmccann/University\ of\ Oregon\ Dropbox/Clare\ McCann/mine/projects/tag-projs/data-tag/"

## UPDATE FILE NAMES IF NEEDED

pds_file_name = "09.2024_PDS_AllObs_Chronological.csv"
pbip_file_name = "10.2024_PBIP_AllObs_Chronological.csv"
age_file_name = "09.2024_age_long_full.csv"
pub_comp_file_name = "Allwaves_PubertyComposite_2024.05.07.csv"

loading in report data

## for puberty self-report variables

### LOAD IN PDS

pds <- read.csv(paste0(data_root, pds_file_name)) %>%
  mutate(tagid=as.factor(tagid),
         wave=as.factor(wave)) %>%
  select(tagid, wave, adrenf2, gonadf2, pdss) %>% 
  rename("adrenal_pdss" = "adrenf2",
         "gonadal_pdss" = "gonadf2")

### LOAD IN PBIP

pbip <- read.csv(paste0(data_root, pbip_file_name)) %>%
  mutate(tagid=as.factor(tagid),
         wave=as.factor(wave)) %>%
  select(tagid, wave, stage, PBIP_1A, PBIP_2A) %>% 
  rename("adrenal_pbip" = "PBIP_2A",
         "gonadal_pbip" = "PBIP_1A")

pub_comp <- read.csv(paste0(data_root, pub_comp_file_name)) %>%
  mutate(tagid=as.factor(tagid),
         wave=as.factor(wave)) %>%
  select(tagid, wave, ADRENcomp, GONADcomp, PUBcomp)

age <- read.csv(paste0(data_root, age_file_name)) %>% 
  mutate(tagid=paste0("TAG",
                      substring(tagid,first=5,last=length(tagid)))) %>% 
    filter(
    !wave == "w1s1",
    !wave == "w2s1", 
    !wave == "w3s1",
    !wave == "w4s1", 
    !wave == "w5s1",
    !wave == "w6s1"
  ) %>% 
  mutate(
    wave = ifelse(wave == "w1s2", "1",
                  ifelse(wave == "w2s2", "2",
                         ifelse(wave == "w3s2", "3",
                                ifelse(wave == "w4s2", "4",
                                       ifelse(wave == "w5s2", "5",
                                              ifelse(wave == "w6s2", "6",
                                              wave))))))) %>% 
  filter(!is.na(age))

puberty_sr <- left_join(pds, pbip, by = c("tagid", "wave"))
puberty_sr <- left_join(puberty_sr, pub_comp, by = c("tagid", "wave"))
puberty_sr <- left_join(puberty_sr, age, by = c("tagid", "wave"))

OPTION TO REMOVE REGRESSING CASES

## REMOVE REGRESSING CASES
### CHANGE PUBERTY VAR TO VARIABLE OF INTEREST

# regressing_ids <- puberty_sr %>%
#   arrange(tagid, wave) %>%
#   group_by(tagid) %>%
#   mutate(stage_diff = stage - lag(stage)) %>%
#   filter(stage_diff < 0) %>%
#   select(id) %>%
#   distinct()
# 
# puberty_sr <- puberty_sr %>% 
#   filter(!(tagid %in% regressing_ids$tagid))

mean plots

# add color for wave

### STAGE / SUMMARY

print("mean pdss at each wave")
## [1] "mean pdss at each wave"
pdss_means <- puberty_sr %>%
  filter(!is.na(pdss)) %>%
  group_by(wave) %>%
  summarize(mean = mean(pdss))

plot(pdss_means, ylim=c(1,5), 
     xlab = "wave",
     ylab = "PDSS",
     main = "plot of means overtime (PDSS)",
     pch = 16)
## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion

print("mean stage (PBIP) score at each wave")
## [1] "mean stage (PBIP) score at each wave"
stage_means <- puberty_sr %>%
  filter(!is.na(stage)) %>%
  group_by(wave) %>%
  summarize(mean = mean(stage))

plot(stage_means, ylim=c(1,5), 
     xlab = "wave",
     ylab = "stage (pbip)",
     main = "plot of means overtime (PBIP - stage)",
     pch = 16)
## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion

print("mean PUBcomp score at each wave")
## [1] "mean PUBcomp score at each wave"
PUBcomp_means <- puberty_sr %>%
  filter(!is.na(PUBcomp)) %>%
  group_by(wave) %>%
  summarize(mean = mean(PUBcomp))

plot(PUBcomp_means, ylim=c(1,5), 
     xlab = "wave",
     ylab = "pubcomp",
     main = "plot of means overtime (pubcomp)",
     pch = 16)

print("mean pdss adrenal score at each wave")
## [1] "mean pdss adrenal score at each wave"
### ADRENAL 

pdss_adren_means <- puberty_sr %>%
  filter(!is.na(adrenal_pdss)) %>%
  group_by(wave) %>%
  summarize(mean = mean(adrenal_pdss))

plot(pdss_adren_means, ylim=c(1,5), 
     xlab = "wave",
     ylab = "adrenal pdss",
     main = "plot of pdss adrenal means overtime",
     pch = 16)
## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion

print("mean pbip adrenal score at each wave")
## [1] "mean pbip adrenal score at each wave"
pbip_adrenal_means <- puberty_sr %>%
  filter(!is.na(gonadal_pbip)) %>%
  group_by(wave) %>%
  summarize(mean = mean(gonadal_pbip))

plot(pbip_adrenal_means, ylim=c(1,5), 
     xlab = "wave",
     ylab = "adrenal pbip",
     main = "plot of pbip adrenal means overtime",
     pch = 16)
## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion

print("mean pubcomp adrenal score at each wave")
## [1] "mean pubcomp adrenal score at each wave"
pubcomp_adrenal_means <- puberty_sr %>%
  filter(!is.na(ADRENcomp)) %>%
  group_by(wave) %>%
  summarize(mean = mean(ADRENcomp))

plot(pubcomp_adrenal_means, ylim=c(1,5), 
     xlab = "wave",
     ylab = "pubcomp adrenal",
     main = "plot of comp adrenal means overtime",
     pch = 16)

### GONADAL 

print("mean pdss gonadal score at each wave")
## [1] "mean pdss gonadal score at each wave"
pdss_gonad_means <- puberty_sr %>%
  filter(!is.na(gonadal_pdss)) %>%
  group_by(wave) %>%
  summarize(mean = mean(gonadal_pdss))

plot(pdss_gonad_means, ylim=c(1,5), 
     xlab = "wave",
     ylab = "gonadal PDSS",
     main = "plot of pdss gonadal means overtime",
     pch = 16)
## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion

print("mean pbip gonadal score at each wave")
## [1] "mean pbip gonadal score at each wave"
pbip_gonad_means <- puberty_sr %>%
  filter(!is.na(gonadal_pbip)) %>%
  group_by(wave) %>%
  summarize(mean = mean(gonadal_pbip))

plot(pbip_gonad_means, ylim=c(1,5), 
     xlab = "wave",
     ylab = "gonadal pbip",
     main = "plot of pbip gonadal means overtime",
     pch = 16)
## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion

print("mean comp gonadal score at each wave")
## [1] "mean comp gonadal score at each wave"
pubcomp_gonad_means <- puberty_sr %>%
  filter(!is.na(GONADcomp)) %>%
  group_by(wave) %>%
  summarize(mean = mean(gonadal_pbip))

plot(pubcomp_gonad_means, ylim=c(1,5), 
     xlab = "wave",
     ylab = "pubcomp gonadal",
     main = "plot of pubcomp gonadal means overtime",
     pch = 16)

adrenal_cor_vars <- puberty_sr[, c("adrenal_pdss", "adrenal_pbip", "ADRENcomp")]
print("printing adrenal cor matrix")
## [1] "printing adrenal cor matrix"
print(cor(adrenal_cor_vars, use = "pairwise.complete.obs"))
##              adrenal_pdss adrenal_pbip ADRENcomp
## adrenal_pdss    1.0000000    0.7765446 0.9299664
## adrenal_pbip    0.7765446    1.0000000 0.9291513
## ADRENcomp       0.9299664    0.9291513 1.0000000
gonadal_cor_vars <- puberty_sr[, c("gonadal_pdss", "gonadal_pbip", "GONADcomp")]
print("printing gonadal cor matrix")
## [1] "printing gonadal cor matrix"
print(cor(gonadal_cor_vars, use = "pairwise.complete.obs"))
##              gonadal_pdss gonadal_pbip GONADcomp
## gonadal_pdss    1.0000000    0.7040927 0.9144084
## gonadal_pbip    0.7040927    1.0000000 0.9141941
## GONADcomp       0.9144084    0.9141941 1.0000000
total_cor_vars <- puberty_sr[, c("pdss", "stage", "PUBcomp")]
print("printing total cor matrix")
## [1] "printing total cor matrix"
print(cor(total_cor_vars, use = "pairwise.complete.obs"))
##              pdss     stage   PUBcomp
## pdss    1.0000000 0.8348423 0.9455534
## stage   0.8348423 1.0000000 0.9442710
## PUBcomp 0.9455534 0.9442710 1.0000000

observed avg age at TS3 for different measures

str(puberty_sr)
## 'data.frame':    877 obs. of  14 variables:
##  $ tagid       : chr  "TAG001" "TAG001" "TAG001" "TAG001" ...
##  $ wave        : chr  "1" "1" "1" "1" ...
##  $ adrenal_pdss: int  3 3 3 3 3 5 4 5 2 4 ...
##  $ gonadal_pdss: int  2 2 2 2 5 5 5 5 3 2 ...
##  $ pdss        : num  2.5 2.5 2.5 2.5 4 5 4.5 5 2.5 3 ...
##  $ stage       : num  2.5 2.5 2.5 2.5 3.5 4 5 4.5 2 3 ...
##  $ gonadal_pbip: int  2 2 2 2 4 4 5 4 2 3 ...
##  $ adrenal_pbip: int  3 3 3 3 3 4 5 5 2 3 ...
##  $ ADRENcomp   : num  3 4 4 5 3 4.5 4.5 NA 2 3.5 ...
##  $ GONADcomp   : num  2 3 3.5 4.5 4.5 4.5 5 NA 2.5 2.5 ...
##  $ PUBcomp     : num  2.5 3.5 3.75 4.75 3.75 4.5 4.75 NA 2.25 3 ...
##  $ X           : int  2 2 2 2 4 6 8 10 14 16 ...
##  $ age         : num  12.3 12.3 12.3 12.3 13.9 ...
##  $ date        : chr  "2016-02-06" "2016-02-06" "2016-02-06" "2016-02-06" ...
puberty_sr$tagid <- as.factor(puberty_sr$tagid)

## means and stuff for stages and things 

print("observed first instance for age at TS3 by pdss")
## [1] "observed first instance for age at TS3 by pdss"
pdss_avg_age_ts3 <- puberty_sr %>%
  filter(pdss == 3) %>%              
  group_by(tagid) %>%                     
  slice_min(order_by = age, n = 1, with_ties = FALSE) %>% 
  ungroup() %>%                                 
  summarize(average_age = mean(age, na.rm = TRUE),
            min_age = min(age, na.rm = TRUE),
            max_age = max(age, na.rm = TRUE),
            n_participants = n_distinct(tagid))

print(pdss_avg_age_ts3) 
## # A tibble: 1 × 4
##   average_age min_age max_age n_participants
##         <dbl>   <dbl>   <dbl>          <int>
## 1        12.1    10.4    14.2             51
print("observed first instance for age at TS3 by stage (PBIP)")
## [1] "observed first instance for age at TS3 by stage (PBIP)"
stage_avg_age_ts3 <- puberty_sr %>%
  filter(stage == 3) %>%              
  group_by(tagid) %>%                     
  slice_min(order_by = age, n = 1, with_ties = FALSE) %>% 
  ungroup() %>%                                 
  summarize(average_age = mean(age, na.rm = TRUE),
            min_age = min(age, na.rm = TRUE),
            max_age = max(age, na.rm = TRUE),
            n_participants = n_distinct(tagid))

print(stage_avg_age_ts3)
## # A tibble: 1 × 4
##   average_age min_age max_age n_participants
##         <dbl>   <dbl>   <dbl>          <int>
## 1        12.0    10.1    13.6             59
print("observed first instance for age at TS3 by pubcomp")
## [1] "observed first instance for age at TS3 by pubcomp"
pubcomp_avg_age_ts3 <- puberty_sr %>%
  filter(PUBcomp == 3) %>%              
  group_by(tagid) %>%                     
  slice_min(order_by = age, n = 1, with_ties = FALSE) %>% 
  ungroup() %>%                                 
  summarize(average_age = mean(age, na.rm = TRUE),
            min_age = min(age, na.rm = TRUE),
            max_age = max(age, na.rm = TRUE),
            n_participants = n_distinct(tagid))

print(pubcomp_avg_age_ts3) 
## # A tibble: 1 × 4
##   average_age min_age max_age n_participants
##         <dbl>   <dbl>   <dbl>          <int>
## 1        12.3    10.7    13.5             27
print("observed first instance for age at TS3 by PDSS adrenal score")
## [1] "observed first instance for age at TS3 by PDSS adrenal score"
pdss_adrenal_avg_age_ts3 <- puberty_sr %>%
  filter(adrenal_pdss == 3) %>%              
  group_by(tagid) %>%                     
  slice_min(order_by = age, n = 1, with_ties = FALSE) %>% 
  ungroup() %>%                                 
  summarize(average_age = mean(age, na.rm = TRUE),
            min_age = min(age, na.rm = TRUE),
            max_age = max(age, na.rm = TRUE),
            n_participants = n_distinct(tagid))

print(pdss_adrenal_avg_age_ts3) 
## # A tibble: 1 × 4
##   average_age min_age max_age n_participants
##         <dbl>   <dbl>   <dbl>          <int>
## 1        12.2    10.2    16.1             84
print("observed first instance for age at TS3 by pbip adrenal score")
## [1] "observed first instance for age at TS3 by pbip adrenal score"
pbip_adrenal_avg_age_ts3 <- puberty_sr %>%
  filter(adrenal_pbip == 3) %>%              
  group_by(tagid) %>%                     
  slice_min(order_by = age, n = 1, with_ties = FALSE) %>% 
  ungroup() %>%                                 
  summarize(average_age = mean(age, na.rm = TRUE),
            min_age = min(age, na.rm = TRUE),
            max_age = max(age, na.rm = TRUE),
            n_participants = n_distinct(tagid))

print(pbip_adrenal_avg_age_ts3) 
## # A tibble: 1 × 4
##   average_age min_age max_age n_participants
##         <dbl>   <dbl>   <dbl>          <int>
## 1        12.2    10.1    14.8             79
print("observed first instance for age at TS3 by pubcomp adrenal score")
## [1] "observed first instance for age at TS3 by pubcomp adrenal score"
pubcomp_adrenal_avg_age_ts3 <- puberty_sr %>%
  filter(ADRENcomp == 3) %>%              
  group_by(tagid) %>%                     
  slice_min(order_by = age, n = 1, with_ties = FALSE) %>% 
  ungroup() %>%                                 
  summarize(average_age = mean(age, na.rm = TRUE),
            min_age = min(age, na.rm = TRUE),
            max_age = max(age, na.rm = TRUE),
            n_participants = n_distinct(tagid))

print(pubcomp_adrenal_avg_age_ts3) 
## # A tibble: 1 × 4
##   average_age min_age max_age n_participants
##         <dbl>   <dbl>   <dbl>          <int>
## 1        12.1    10.2    14.8             43
print("observed first instance for age at TS3 by PDSS gonadal score")
## [1] "observed first instance for age at TS3 by PDSS gonadal score"
pdss_gonadal_avg_age_ts3 <- puberty_sr %>%
  filter(gonadal_pdss == 3) %>%              
  group_by(tagid) %>%                     
  slice_min(order_by = age, n = 1, with_ties = FALSE) %>% 
  ungroup() %>%                                 
  summarize(average_age = mean(age, na.rm = TRUE),
            min_age = min(age, na.rm = TRUE),
            max_age = max(age, na.rm = TRUE),
            n_participants = n_distinct(tagid))

print(pdss_gonadal_avg_age_ts3)
## # A tibble: 1 × 4
##   average_age min_age max_age n_participants
##         <dbl>   <dbl>   <dbl>          <int>
## 1        11.8    10.0    15.3             93
print("observed first instance for age at TS3 by pubcomp gonadal score")
## [1] "observed first instance for age at TS3 by pubcomp gonadal score"
pubcomp_gonadal_avg_age_ts3 <- puberty_sr %>%
  filter(GONADcomp == 3) %>%              
  group_by(tagid) %>%                     
  slice_min(order_by = age, n = 1, with_ties = FALSE) %>% 
  ungroup() %>%                                 
  summarize(average_age = mean(age, na.rm = TRUE),
            min_age = min(age, na.rm = TRUE),
            max_age = max(age, na.rm = TRUE),
            n_participants = n_distinct(tagid))

print(pubcomp_gonadal_avg_age_ts3)
## # A tibble: 1 × 4
##   average_age min_age max_age n_participants
##         <dbl>   <dbl>   <dbl>          <int>
## 1        12.0    10.1    14.9             57

viewing the raw data

## visualizing observations

print("observed PDSS trajectories")
## [1] "observed PDSS trajectories"
ggplot(puberty_sr, aes(x = age, y = pdss)) +
  geom_point() +
  geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 246 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 246 rows containing missing values or values outside the scale range
## (`geom_point()`).

print("observed stage (PBIP) trajectories")
## [1] "observed stage (PBIP) trajectories"
ggplot(puberty_sr, aes(x = age, y = stage)) +
  geom_point() +
  geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 249 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 249 rows containing missing values or values outside the scale range
## (`geom_point()`).

print("observed pubcomp trajectories")
## [1] "observed pubcomp trajectories"
ggplot(puberty_sr, aes(x = age, y = PUBcomp)) +
  geom_point() +
  geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 250 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 250 rows containing missing values or values outside the scale range
## (`geom_point()`).

print("observed pdss adrenal trajectories")
## [1] "observed pdss adrenal trajectories"
ggplot(puberty_sr, aes(x = age, y = adrenal_pdss)) +
  geom_point() +
  geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 235 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 235 rows containing missing values or values outside the scale range
## (`geom_point()`).

print("observed pbip adrenal trajectories")
## [1] "observed pbip adrenal trajectories"
ggplot(puberty_sr, aes(x = age, y = adrenal_pbip)) +
  geom_point() +
  geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 249 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 249 rows containing missing values or values outside the scale range
## (`geom_point()`).

print("observed pubcomp adrenal trajectories")
## [1] "observed pubcomp adrenal trajectories"
ggplot(puberty_sr, aes(x = age, y = ADRENcomp)) +
  geom_point() +
  geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 248 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 248 rows containing missing values or values outside the scale range
## (`geom_point()`).

print("observed pdss gonadal trajectories")
## [1] "observed pdss gonadal trajectories"
ggplot(puberty_sr, aes(x = age, y = gonadal_pdss)) +
  geom_point() +
  geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 246 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 246 rows containing missing values or values outside the scale range
## (`geom_point()`).

print("observed pbip gonadal trajectories")
## [1] "observed pbip gonadal trajectories"
ggplot(puberty_sr, aes(x = age, y = gonadal_pbip)) +
  geom_point() +
  geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 245 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 245 rows containing missing values or values outside the scale range
## (`geom_point()`).

print("observed pubcomp gonadal trajectories")
## [1] "observed pubcomp gonadal trajectories"
ggplot(puberty_sr, aes(x = age, y = GONADcomp)) +
  geom_point() +
  geom_smooth(method = "loess")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 249 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 249 rows containing missing values or values outside the scale range
## (`geom_point()`).

data preparation

## getting a better idea of the data we have 

obs_per_participant <- puberty_sr %>%
  group_by(tagid) %>%
  summarise(observations = n()) %>%
  ungroup()

obs_distribution <- obs_per_participant %>%
  group_by(observations) %>%
  summarise(participants_count = n())

print(obs_distribution)
## # A tibble: 9 × 2
##   observations participants_count
##          <int>              <int>
## 1            1                 53
## 2            2                 17
## 3            3                 25
## 4            4                 39
## 5            5                 42
## 6            6                 23
## 7            7                 10
## 8            8                 12
## 9            9                  5
## creating a mean centered age variable but right now the model is using original age which i think is what we want, right? 

puberty_sr <- puberty_sr %>% 
  filter(!is.na(age) & !is.na(tagid)) %>% 
  mutate(age_cen = age - mean(age))

THE LOOP

### choose what variables you want to use

set.seed(90025)

print("interpretation of values: lambda is timing (or age at peak growth / TS3), alpha is tempo (or rate of change)")
## [1] "interpretation of values: lambda is timing (or age at peak growth / TS3), alpha is tempo (or rate of change)"
print("positive or larger values for lambda = earlier timing, positive or larger values for alpha = slower tempo")
## [1] "positive or larger values for lambda = earlier timing, positive or larger values for alpha = slower tempo"
puberty_vars <- c("pdss", "stage", "PUBcomp", "adrenal_pbip", "gonadal_pbip", "GONADcomp")
## model does not converge with PDSS or PUBcomp adrenal scores, or PDSS gonadal score

# logistic model with random effects
logistic_model <- function(age, b0, b1, alpha, lambda) {
  b0 + (b1 - b0) * (1 / (1 + exp(-alpha * (age - lambda))))
}

start_vals <- list(fixed = c(alpha = 0.95, lambda = 13.1))

results_list <- list()

for (var in puberty_vars) {
  print(paste0("fitting model for: ", var))
  
  puberty_sr_tmp <- puberty_sr %>%
    rename(puberty = !!sym(var)) %>%
    filter(!is.na(puberty) & !is.na(age) & !is.na(tagid))

  fit <- nlme(puberty ~ logistic_model(age, 1, 5, alpha, lambda),
              data = puberty_sr_tmp,
              fixed = alpha + lambda ~ 1,
              random = pdDiag(alpha + lambda ~ 1),
              groups = ~ tagid,
              start = start_vals,
              method = "ML")

  fitted_values <- fitted(fit)
  individual_estimates <- ranef(fit)
  individual_estimates_df <- data.frame(id = rownames(individual_estimates), individual_estimates) %>%
    rename("tagid" = "id")
  
  puberty_plot <- cbind(fitted_values, puberty_sr_tmp)
  puberty_plot <- left_join(puberty_plot, individual_estimates_df, by = "tagid")
  
  fitted_values_plot <- ggplot(data = puberty_plot, aes(x = age, y = fitted_values, color = tagid)) +
    geom_point(aes(y = fitted_values), size = 1, shape = 16) +  
    geom_line(aes(y = fitted_values), size = 0.5, linetype = "solid") +
    labs(
      title = paste("predicted values -", var),
      x = "age",
      y = "predicted pubertal stage",
      color = "tagid"
    ) +
    theme_minimal() +
    theme(legend.position = "none")
  
  print(paste0("printing fitted values plot for", var))
  
  print(fitted_values_plot)
  
  results_list[[var]] <- list(
    fit = fit,
    fitted_values = fitted_values,
    individual_estimates = individual_estimates,
    plot = fitted_values_plot
  )
  
  
  # write.csv(puberty_wide, paste0("puberty_wide_", var, ".csv"))
  # write.csv(puberty_plot, paste0("puberty_plot_", var, ".csv"))
}
## [1] "fitting model for: pdss"
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## [1] "printing fitted values plot forpdss"

## [1] "fitting model for: stage"
## [1] "printing fitted values plot forstage"

## [1] "fitting model for: PUBcomp"
## Warning in nlme.formula(puberty ~ logistic_model(age, 1, 5, alpha, lambda), :
## Iteration 1, LME step: nlminb() did not converge (code = 1). PORT message:
## false convergence (8)
## [1] "printing fitted values plot forPUBcomp"

## [1] "fitting model for: adrenal_pbip"
## [1] "printing fitted values plot foradrenal_pbip"

## [1] "fitting model for: gonadal_pbip"
## [1] "printing fitted values plot forgonadal_pbip"

## [1] "fitting model for: GONADcomp"
## [1] "printing fitted values plot forGONADcomp"

all_estimates <- NULL

for (var in names(results_list)) {
  estimates <- results_list[[var]]$individual_estimates
  estimates_df <- data.frame(tagid = rownames(estimates), estimates, row.names = NULL)
  
  colnames(estimates_df)[-1] <- paste0(var, "_", colnames(estimates_df)[-1])
  
  if (is.null(all_estimates)) {
    all_estimates <- estimates_df
  } else {
    all_estimates <- full_join(all_estimates, estimates_df, by = "tagid")
  }
}


write.csv(all_estimates, paste0(proj_root, "TAG_timing_tempo_04.2025.csv"), row.names = FALSE)

testing different lambda and alpha start values

##### CHANGE IN STARTING VALUES DID NOT SIGNIFICANTLY IMPACT MODEL FIT OR ESTIMATES

# set.seed(12000)
# 
# ### remove a few observations and see if results hold 
# 
# logistic_model <- function(age, b0, b1, alpha, lambda) {
# b0 + (b1 - b0) * (1 / (1 + exp(-alpha * (age - lambda)))) }
# 
# 
# fit_logistic_model <- function(data, alpha_start, lambda_start) {
# 
#   start_vals <- list(fixed = c(alpha = alpha_start, lambda = lambda_start))
#   
# 
#   fit <- tryCatch(
#     {
#       nlme(
#         pdss ~ logistic_model(age, 1, 5, alpha, lambda),
#         data = puberty_sr,
#         fixed = alpha + lambda ~ 1,
#         random = pdDiag(alpha + lambda ~ 1),
#         groups = ~ tagid,
#         start = start_vals,
#         method = "ML"
#       )
#     },
#     error = function(e) NULL
#   )
#   
#   return(fit)
# }
# 
# 
# ## played around with a few different ranges, the model seems to be sensitive to this (won't converge if the range is too big)
# alpha_range <- seq(0.7, 1.0, by = 0.05)
# lambda_range <- seq(12, 13.5, by = 0.1) 
# 
# 
# results <- data.frame(
#   alpha = numeric(),
#   lambda = numeric(),
#   logLik = numeric(),  
#   AIC = numeric(),     
#   BIC = numeric()     
# )
# 
# 
# for (alpha_val in alpha_range) {
#   for (lambda_val in lambda_range) {
#     fit <- fit_logistic_model(puberty_sr, alpha_val, lambda_val)
#     
#     if (!is.null(fit)) {
# 
#       results <- results %>%
#         add_row(
#           alpha = alpha_val,
#           lambda = lambda_val,
#           logLik = as.numeric(logLik(fit)),
#           AIC = AIC(fit),
#           BIC = BIC(fit)
#         )
#     }
#   }
# }
# 
# best_model <- results %>% arrange(AIC) %>% slice(1)
# 
# 
# print(results)
# 
# # best combination of alpha and lambda
# print(best_model) ## alpha = 0.95, lambda = 12.1 // now it's giving me a different combo, 0.95 and 13.1... i think its bcz i messed with the range of start values to try and added back in random slope bcz it was converging with the new range (i just made the range a bit smaller)